This chart represents the average remote work ratio for all employees working at Company ABC. If the (ARWR) is 65%, that means 65% of all employees had some sort of remote work ability for that year.
This chart shows how all employees fall into the three remote work categories.
Note: The “Growth (%)” column represents the year-over-year percentage change in job counts per that Job Area.
---
# YAML header defining the dashboard properties:
title: "Company ABC Salary Dashboard"
output:
flexdashboard::flex_dashboard:
orientation: columns
vertical_layout: scroll
social: menu
source: embed
# CCS Styling was used below to help the interactive tbales on Page 3 be more viewable.
---
<style>.dataTables_wrapper {overflow: auto; clear: both; height: auto !important; min-height: 300px;} .dataTables_wrapper .dataTables_paginate {margin-top:0.5em;}</style>
```{r setup, include=FALSE}
## Setup chunk, Loads needed or possibly needed libraries.
library(flexdashboard)
library(ggplot2)
library(plotly)
library(DT)
library(dplyr)
library(scales)
# Setting work directory:
setwd("C:/Users/micha/OneDrive/Desktop/RStudio Class/Rstudio_Assignment_#3_(Group Assignment)")
# reading in assignment's .csv file with relative path and changing its name to help preserve the original
in_progress_ds_salaries_full<- read.csv("ds_salaries_full.csv")
```
Page 1: Plots & a Value Box
=======================================================================
## Column {data-width=50%}
-----------------------------------------------------------------------
### Chart A: Salaries across Experience Levels
```{r Chart-a}
# Loading the needed or possibly needed libraries.
library(flexdashboard)
library(ggplot2)
library(plotly)
library(DT)
library(dplyr)
library(scales)
# Mapping experience level abbreviations to full names
experience_levels <- c(MI = "Mid-Level", SE = "Senior", EN = "Entry-Level", EX = "Executive")
# Adding a new column for the full experience level names
in_progress_ds_salaries_full <- in_progress_ds_salaries_full %>% mutate(level_of_experience = experience_levels[experience_level])
# Creating the boxplot with formatted axis labels and titles
experience_salary_plot <- plot_ly(data = in_progress_ds_salaries_full, x = ~level_of_experience, y= ~salary_in_usd, type = 'box', text = ~paste("Salary: $", formatC(salary_in_usd, format = "d", big.mark = ",")), marker = list(color = 'black')) %>% layout(yaxis = list(title = 'Salary in USD', tickformat = ',d'))
# Converting the plot to an interactive plotly object
experience_salary_plot
```
## Column {data-width=50%}
-----------------------------------------------------------------------
### Chart B: Comparing Salaries in the, US, Europe, Canada, and Other Regions from the .CSV
```{r categorize-regions}
## Loading the needed or possibly needed libraries.
library(flexdashboard)
library(ggplot2)
library(plotly)
library(DT)
library(dplyr)
library(scales)
# Defining lists of countries for each "region"
european_countries <- c("DE", "GB", "FR", "HU", "NL", "AT", "ES", "PT", "DK", "IT", "HR", "LU", "PL", "RO", "BE", "MT", "SI", "IE", "GR", "RU", "CH", "TR")
United_states <- c("US")
Canada <- c("CA")
Other_countries <- c("JP", "HN", "NZ", "IN", "PK", "CN", "AE", "MX", "NG", "IQ", "BR", "SG", "IL", "CL", "IR", "CO", "MD", "KE", "VN", "AS", "DZ", "MY", "AU", "UA","CZ", "EE")
# Categorizing company locations into regions
in_progress_ds_salaries_full$region <- ifelse(in_progress_ds_salaries_full$company_location %in% european_countries, "Europe", ifelse(in_progress_ds_salaries_full$company_location == "US", "United States", ifelse(in_progress_ds_salaries_full$company_location == "CA", "Canada", "Other")))
# Created new column formatted salary
in_progress_ds_salaries_full$salary_formatted <- formatC(in_progress_ds_salaries_full$salary_in_usd, format = "f", big.mark = ",")
# Creating a boxplot for salary comparison by region
plotly_region_salary_plot <- plot_ly(data = in_progress_ds_salaries_full, x = ~region, y = ~salary_in_usd, type = 'box', text = ~paste("Salary: $", formatC(salary_in_usd, format = "d", big.mark = ",")), marker = list(color = 'black')) %>% layout(yaxis = list(title = 'Salary in USD', tickformat = ',d'))
# Using plotly to make an interactive plot
plotly_region_salary_plot
```
-----------------------------------------------------------------------
### Chart C: Value Box for Average Salary
```{r calculate-average-salary}
# Calculating the average salary then rounding it
average_salary <- round(mean(in_progress_ds_salaries_full$salary_in_usd, na.rm = TRUE), 2)
```
```{r value-box-average-salary}
# Loading in needed packages
library(flexdashboard)
# Creating a formatted string for the value box
formatted_avg_salary <- paste0("Average Salary (USD):$", formatC(average_salary, format = "f", big.mark = ",", digits = 2))
# Displaying the average salary in a value box
valueBox(formatted_avg_salary, icon = "fa-dollar-sign", color = "primary")
```
Page 2: Charts on remote worker category data averages and ratios.
=======================================================================
## Column {data-width=50%}
-----------------------------------------------------------------------
### Chart D: Averaging the remote_ratio column, to see what percentage of all employees can work remotely (Partially or Fully) per year.
This chart represents the average remote work ratio for all employees working at Company ABC. If the (ARWR) is 65%, that means 65% of all employees had some sort of remote work ability for that year.
```{r aggregate-remote-data-1}
# Loading the needed or possibly needed libraries.
library(flexdashboard)
library(ggplot2)
library(plotly)
library(DT)
library(dplyr)
library(scales)
# Aggregating data to find the average remote work ratio per year and round the amount
remote_trends1 <-in_progress_ds_salaries_full %>% group_by(work_year) %>% summarise(average_remote_ratio = mean(remote_ratio, na.rm = TRUE)) %>% mutate(average_remote_ratio = round(average_remote_ratio, 0))
remote_trends1$work_year <- factor(remote_trends1$work_year, labels = c("2020", "2021", "2022"))
# Creating a bar chart with custom labels and formatting
remote_trend_plot1 <- ggplot(remote_trends1, aes(x=work_year, y=average_remote_ratio, fill=work_year)) + geom_bar(stat="identity") + scale_y_continuous(labels = scales::label_number(suffix = "%"),limits = c(0, 100), breaks = seq(0, 100, by = 10)) + labs(title="Average Remote Work Ratio for All Employees By Year", x="Year", y="(ARWR) Average Remote Work Ratio") + theme(axis.title.y = element_text(size = 14, angle = 90, vjust = 0.5, color = "black", margin = margin(t = 0, r = 20, b = 0, l = 0)), axis.text.y = element_text(hjust = 1), plot.title = element_text(hjust = 0.5))
# converting the GGPLOT chart into a plotly object that I can customize
ggplotly(remote_trend_plot1) %>% layout(hoverlabel = list(bgcolor = "black"), hovermode = 'closest') %>% style(hovertemplate = 'Year: %{x}<br>Average Remote Work Ratio (%): %{y}')
```
## Column {data-width=50%}
-------------------------------------------------------------------
### Chart E: Differences in Remote Work Categories By Year
This chart shows how all employees fall into the three remote work categories.
```{r aggregate-remote-data-2}
# Loading all library packages I might use:
library(flexdashboard)
library(ggplot2)
library(plotly)
library(DT)
library(dplyr)
library(scales)
# Prepare the data for the area chart:
remote_data_area_chart <- in_progress_ds_salaries_full %>% group_by(work_year, remote_ratio) %>% summarise(employee_count = n(), .groups = "drop") %>% left_join(in_progress_ds_salaries_full %>% group_by(work_year) %>% summarise(year_total = n(), .groups = "drop"), by = "work_year" ) %>% mutate(percent = round((employee_count / year_total) * 100, 1)) %>% ungroup()
# Adjust the percent data to be a proportion and calculate the cumulative percentage:
remote_data_area_chart <- remote_data_area_chart %>% mutate(percent = percent /100) %>% arrange(work_year, desc(remote_ratio)) %>% group_by(work_year) %>% mutate(cum_percent = cumsum(percent), text_position = if_else(remote_ratio == 0, percent / 2, lag(cum_percent, default = first(cum_percent)) - percent / 2)) %>% ungroup()
# Creating the area chart with text annotations for each category and year.
final_area_chart <- ggplot(data=remote_data_area_chart, aes(x=work_year, y=percent, fill=as.factor(remote_ratio),)) + geom_area(position = 'stack', alpha = 0.7) + geom_text(data=subset(remote_data_area_chart, work_year == 2020 & remote_ratio == 0), aes(label = scales::percent(percent), y=text_position), size = 5, color = "black", nudge_x = 0.1, nudge_y = 0.84, vjust = 0.5) + geom_text(data=subset(remote_data_area_chart, work_year == 2020 & remote_ratio == 50), aes(label = scales::percent(percent), y=text_position), size = 5, color = "black", nudge_x = 0.1, nudge_y = 0.22, vjust = 0.5) + geom_text(data = subset(remote_data_area_chart, work_year == 2020 & remote_ratio == 100), aes(label = scales::percent(percent), y=text_position), size = 5, color = "black", nudge_x = 0.1, nudge_y = -0.15, vjust = 0.5) + geom_text(data = subset(remote_data_area_chart, work_year == 2021 & remote_ratio == 0), aes(label = scales::percent(percent), y=text_position), size = 5, color = "black", nudge_y = 0.87, vjust = 0.5) + geom_text(data = subset(remote_data_area_chart, work_year == 2021 & remote_ratio == 50), aes(label = scales::percent(percent), y=text_position), size = 5, color = "black", nudge_y = 0.23, vjust = 0.5) + geom_text(data = subset(remote_data_area_chart, work_year == 2021 & remote_ratio == 100), aes(label = scales::percent(percent), y=text_position), size = 5, color = "black", nudge_y = -0.16, vjust = 0.5) + geom_text(data = subset(remote_data_area_chart, work_year == 2022 & remote_ratio ==0), aes(label=scales::percent(percent), y = text_position), size = 5, color = "black", nudge_x = -0.1, nudge_y = 0.83, vjust = 0.5) + geom_text(data = subset(remote_data_area_chart, work_year == 2022 & remote_ratio == 50), aes(label = scales::percent(percent), y = text_position), size = 5, color = "black", nudge_x = -0.1, nudge_y = 0.02, vjust = 0.5) + geom_text(data = subset(remote_data_area_chart, work_year == 2022 & remote_ratio == 100), aes(label = scales::percent(percent), y = text_position), size = 5, color = "black", nudge_x = -0.1, nudge_y = -0.255, vjust = 0.5) + scale_fill_manual(values = c("0"="red", "50"="yellow", "100"="green"), labels = c("No Remote", "Partially Remote", "Fully Remote"), name = "Remote Work Category") + scale_y_continuous(labels = NULL) + scale_x_continuous(breaks = unique(remote_data_area_chart$work_year)) + labs(title = "Trend of all Remote Work Categories Over the Years", x="", y="Percentages out of all Employees", fill = "Remote Ratio") + theme_minimal() + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1), legend.position = "right")
# Changing the GGPLOT chart into a plotly chart
ggplotly(final_area_chart, tooltip = NULL)
```
Page 3: Interactive Data Tables on Job Titles, Salaries, and Job Growth by Year
=======================================================================
## Column {data-width=50%}
-----------------------------------------------------------------------
### Chart F: Interactive Table for Job Title counts & Their Salaries (just as an extra snip of info). {data-height=500}
This table shows each Job Title count in descending order (largest to smallest).
```{r interactive-table-Counts-of-Job-Titles-&-Salaries-by-Job-Title}
# Loading needed library packages
library(flexdashboard)
library(ggplot2)
library(plotly)
library(DT)
library(dplyr)
library(scales)
# Aggregating data to find the average salaries by job title, and then round and arranging them
salary_by_job <- in_progress_ds_salaries_full %>% group_by(job_title) %>% summarise(average_salary_in_usd = round(mean(salary_in_usd, na.rm = TRUE), 0), count = n()) %>% arrange(desc(count))
# Creating an interactive table with the aggregated data
datatable(salary_by_job, options = list(pageLength = 10, lengthMenu = list(c(10, 25, -1), c('10', '25','All'))))
```
## Column {data-width=50%}
--------------------------------------------------------------------
### Chart G: Interactive Table for which area has seen the most growth over the years.{data-height=500}
This table shows the yearly growth comparison by Job Area. Percent Growth = (Final Value - Initial Value / Initial Value) X 100
```{r interactive-table-job-area-growth-year-to-year}
# Loading needed library packages
library(flexdashboard)
library(ggplot2)
library(plotly)
library(DT)
library(dplyr)
library(scales)
# Grouping data by year and job group to count the number of jobs in each category
counts_by_year <- in_progress_ds_salaries_full %>% group_by(work_year, job_group) %>% summarise(count = n(), .groups = "drop")
# Calculating the year-over-year growth for each job group
growth_by_year <-counts_by_year %>% arrange(job_group, work_year) %>% group_by(job_group) %>% mutate(growth = (count / lag(count) -1) * 100) %>% mutate(growth = if_else(is.na(growth), NA_real_, round(growth, 2)))
# Merging counts with growth rates
job_growth_with_growth <- counts_by_year %>% left_join(growth_by_year, by = c("work_year", "job_group")) %>% select(work_year, job_group, count = count.x, growth) %>% arrange(work_year, desc(count))
# Creating an interactive table with the merged data
datatable(job_growth_with_growth, options = list(pageLength = 5, lengthMenu = list(c(5, 10, -1), c('5', '10', 'All')), columnDefs = list(list(title = 'Growth from previous year (%)', targets = 4))))
## BELOW this, I Added a note to the bottom of the table to give more information
```
_Note: The "Growth (%)" column represents the year-over-year percentage change in job counts per that Job Area._